## Replication code for "Strategic Government Communication about Performance"
## 18 August 2020

## ------------ ##
#### Preamble ####
## ------------ ##
library(tidyverse)
library(tm)
library(RCurl)
library(topicmodels)
library(stm)
library(tools)
library(xtable)
library(RTextTools)
library(stargazer)
library(foreign)
library(lfe)
library(broom)

#### Pre-work ###

## Running main topic model:
K=45 # Other numbers of topics in appendix

time_start_45 <- Sys.time()
PRfit45 <- stm(out$documents, out$vocab, K=45,
               prevalence=~metainfo$PRcity, max.em.its=200,
               data=out$meta, seed=02139)
time_end_45 <- Sys.time()
difftime(time_end_45,time_start_45)
# save(PRfit45,file="stm_output.RData")



## ------------- ##
#### Read data ####
## ------------- ##

# Load document text + info in dataframe:
metainfo <- read_rds("metainfo.rds")
nrow(metainfo) # 111892
writeLines(prettyNum(nrow(metainfo),big.mark = ","),sep = "%",con = "Tables/n_documents.tex")


# Load output from STM:
load("stm_output.RData")


## Create time variables:
metainfo$PRmonth <- as.character(format(metainfo$PRdate,"%m"))
metainfo$PRyear <- as.character(format(metainfo$PRdate,"%Y"))
metainfo$PRyearmonth <- paste0(metainfo$PRyear,metainfo$PRmonth)
metainfo$quarter <- lubridate::quarter(metainfo$PRdate,with_year = T)

metainfo$lag_year <- as.numeric(metainfo$PRyear)-1
metainfo$lag_1month_month <- as.character(format(metainfo$PRdate-30,"%m"))
metainfo$lag_1month_year <- as.character(format(metainfo$PRdate-30,"%Y"))


## Create main DVs:
dev_45 <- c(31) # after viewing topics and reading selected documents
crime_45 <- c(23,16) # both these topics (see Table A1)

# DVs from STM document proportions:
metainfo$theta_dev_45 <- rowSums(data.frame(PRfit45$theta[,dev_45]))
metainfo$theta_crime_45 <- rowSums(data.frame(PRfit45$theta[,crime_45]))

# DVs using dictionary approach:
# create dictionary as subset of top 100 words with highest prob of econ dev STM-identified topic:
econ_dict <- data.frame(t(labelTopics(PRfit45,n=200)$prob))[,dev_45]
# View(econ_dict)
econ_dict_manual <- c("business", "businesses",
                      "economic", "economy",
                      "jobs", "job",
                      "development",
                      "growth", "grow", "growing",
                      "create", "creating","creation",
                      "investment",
                      "opportunities",
                      "workforce", "working",
                      "company",
                      "employ","employment","employing"
)

crime_dict <- data.frame(t(labelTopics(PRfit45,n=200)$prob))[,crime_45]
# View(crime_dict)
crime_dict_manual<- c("police","officer","officers","precinct",
                      "victim","homicide","homicides","crime","crimes","burglary","burglaries","shooting",
                      "investigation","detective","detectives","surveillance",
                      "suspect","suspects","robbery","arrest","arrested","charged","warrant"
)

count_in_dict <- function(document, dictionary){
  thisstring <- str_split(str_squish(document)," ",simplify=T)
  if(length(thisstring)>0){
    count_in_dict <- apply(thisstring,1,FUN = function(x){as.numeric(x %in% dictionary)})
    if(length(count_in_dict)>1){
      all_in_dict <- apply(count_in_dict,2,sum)
    } else {
      all_in_dict <- count_in_dict
    }
  } else{
    all_in_dict <- NA
  }
  return(all_in_dict)
}

metainfo$count_in_econdevdict <- unlist(lapply(metainfo$content,count_in_dict, dictionary = econ_dict_manual))
metainfo$count_in_crimedict <- unlist(lapply(metainfo$content,count_in_dict, dictionary = crime_dict_manual))

## DVs with different thresholds:
metainfo$in_econdevdict5 <- as.numeric(metainfo$count_in_econdevdict>5)
metainfo$in_econdevdict6 <- as.numeric(metainfo$count_in_econdevdict>6)
metainfo$in_econdevdict7 <- as.numeric(metainfo$count_in_econdevdict>7)
metainfo$in_econdevdict8 <- as.numeric(metainfo$count_in_econdevdict>8)
metainfo$in_econdevdict9 <- as.numeric(metainfo$count_in_econdevdict>9)
metainfo$in_econdevdict10 <- as.numeric(metainfo$count_in_econdevdict>10)
metainfo$in_econdevdict11 <- as.numeric(metainfo$count_in_econdevdict>11)
metainfo$in_econdevdict12 <- as.numeric(metainfo$count_in_econdevdict>12)
metainfo$in_econdevdict13 <- as.numeric(metainfo$count_in_econdevdict>13)
metainfo$in_econdevdict14 <- as.numeric(metainfo$count_in_econdevdict>14)
metainfo$in_econdevdict15 <- as.numeric(metainfo$count_in_econdevdict>15)


metainfo$in_crimedict5 <- as.numeric(metainfo$count_in_crimedict>5)
metainfo$in_crimedict6 <- as.numeric(metainfo$count_in_crimedict>6)
metainfo$in_crimedict7 <- as.numeric(metainfo$count_in_crimedict>7)
metainfo$in_crimedict8 <- as.numeric(metainfo$count_in_crimedict>8)
metainfo$in_crimedict9 <- as.numeric(metainfo$count_in_crimedict>9)
metainfo$in_crimedict10 <- as.numeric(metainfo$count_in_crimedict>10)
metainfo$in_crimedict11 <- as.numeric(metainfo$count_in_crimedict>11)
metainfo$in_crimedict12 <- as.numeric(metainfo$count_in_crimedict>12)
metainfo$in_crimedict13 <- as.numeric(metainfo$count_in_crimedict>13)
metainfo$in_crimedict14 <- as.numeric(metainfo$count_in_crimedict>14)
metainfo$in_crimedict15 <- as.numeric(metainfo$count_in_crimedict>15)




## Crime data:
crime_bycity <- read_rds("nibrs_monthly_city.rds")


# Merge crime data with text data:
metainfo <- metainfo %>%
  mutate(PRyear = as.numeric(PRyear))
metainfo_crime <- left_join(filter(metainfo, !is.na(PRmonth) & !is.na(PRyear)), crime_bycity,
                            by=c("PRcity"="PRcity","PRmonth"="inc_month","PRyear"="inc_year"))

metainfo_crime <- metainfo_crime %>%
  mutate(nviolent_rate_delta = nviolent_rate - nviolent_rate_lag,
         nviolent_rate_deltalog = log1p(nviolent_rate) - log1p(nviolent_rate_lag)
  )

## Economic conditions data:
qcew <- read_rds("qcew.rds")

# merge to document data:
metainfo_econ <- metainfo %>%
  filter(PRyear>1989 & !is.na(quarter) & !is.na(msa_fips))
metainfo_econ <- left_join(metainfo_econ, qcew,
                           by=c("quarter"="year_q","msa_fips"="msa_fips"))

metainfo_econ <- metainfo_econ %>%
  mutate(quarterly_wages_perworker_delta = quarterly_wages_perworker - quarterly_wages_perworker_lag,
         quarterly_wages_perworker_deltalog = log(quarterly_wages_perworker) - log(quarterly_wages_perworker_lag),
         quarterly_wages_perworker_deltalead = quarterly_wages_perworker_lead - quarterly_wages_perworker,
         quarterly_wages_perworker_log = log(quarterly_wages_perworker)
  )

metainfo_econ$city_quarter <- paste0(metainfo_econ$PRcity,metainfo_econ$quarter)


## ------------ ##
#### Figure 1 ####
## ------------ ##
# only using those with valid date for timeline plot:
metainfo2 <- metainfo %>%
  filter(!is.na(PRdate))

pdf("Figures/timeseries_zoom.pdf",width=8,height=12)
par(mar=c(3,8,0,3))
plot(NULL, xlim=c(as.Date("1998-12-31"),as.Date("2016-12-31")),
     ylim=c(min(as.numeric(factor(metainfo2$PRcity))),max(as.numeric(factor(metainfo2$PRcity)))),
     axes=F,xlab="Date",ylab="")
abline(h=c(min(as.numeric(factor(metainfo2$PRcity)))+0.5:max(as.numeric(factor(metainfo2$PRcity)))-0.5),
       col="lightgrey")
points(metainfo2$PRdate,
       jitter((max(as.numeric(factor(metainfo2$PRcity)))+1)-as.numeric(factor(metainfo2$PRcity))),
       pch=".",cex=1)
axis(1,at=seq(as.Date("1998-12-31"),as.Date("2016-12-31"),by=365),
     labels=F,tick=T,line = -1)
text(x=seq(as.Date("1998-12-31"),as.Date("2016-12-31"),by=365),
     y=-0.7,
     cex = 0.7,
     labels=format(seq(as.Date("1998-12-31"),as.Date("2016-12-31"),by=365),"%b. '%y"),
     srt=45,pos=1,xpd=T)
axis(2,at=max(as.numeric(factor(metainfo2$PRcity))):1,
     labels=unique(factor(metainfo2$PRcity)),
     cex.axis=0.7,las=1)
axis(4,at=length(unique(metainfo2$PRcity)):1,
     labels=tapply(metainfo2$PRdate,metainfo2$PRcity,length),
     cex.axis=0.7,las=1)
axis(4,at=length(unique(metainfo2$PRcity))+1.0,
     labels="n =",cex.axis=0.7,las=1,tick=F)
dev.off()


#### Economy: aggregation ####

citysumm_econ_quarter <- metainfo_econ %>%
  group_by(PRcity, msa_fips, PRyear, quarter) %>%
  summarize(
    theta_dev_45 = mean(theta_dev_45,na.rm=T),
    theta_crime_45 = mean(theta_crime_45,na.rm=T),
    prop_econ_gt5words = mean(in_econdevdict5,na.rm=T),
    prop_econ_gt6words = mean(in_econdevdict6,na.rm=T),
    prop_econ_gt7words = mean(in_econdevdict7,na.rm=T),
    prop_econ_gt8words = mean(in_econdevdict8,na.rm=T),
    prop_econ_gt9words = mean(in_econdevdict9,na.rm=T),
    prop_econ_gt10words = mean(in_econdevdict10,na.rm=T),
    prop_econ_gt11words = mean(in_econdevdict11,na.rm=T),
    prop_econ_gt12words = mean(in_econdevdict12,na.rm=T),
    prop_econ_gt13words = mean(in_econdevdict13,na.rm=T),
    prop_econ_gt14words = mean(in_econdevdict14,na.rm=T),
    prop_econ_gt15words = mean(in_econdevdict15,na.rm=T),
    
    prop_crime_gt5words = mean(in_crimedict5,na.rm=T),
    prop_crime_gt6words = mean(in_crimedict6,na.rm=T),
    prop_crime_gt7words = mean(in_crimedict7,na.rm=T),
    prop_crime_gt8words = mean(in_crimedict8,na.rm=T),
    prop_crime_gt9words = mean(in_crimedict9,na.rm=T),
    prop_crime_gt10words = mean(in_crimedict10,na.rm=T),
    prop_crime_gt11words = mean(in_crimedict11,na.rm=T),
    prop_crime_gt12words = mean(in_crimedict12,na.rm=T),
    prop_crime_gt13words = mean(in_crimedict13,na.rm=T),
    prop_crime_gt14words = mean(in_crimedict14,na.rm=T),
    prop_crime_gt15words = mean(in_crimedict15,na.rm=T),
    
    num = n()
  )


citysumm_econ_quarter <- citysumm_econ_quarter %>%
  filter(quarter>1989.4)
citysumm_econ_quarter <- left_join(citysumm_econ_quarter, qcew,
                                   by=c("quarter"="year_q","msa_fips"="msa_fips"))

citysumm_econ_quarter <- mutate(citysumm_econ_quarter,
                                quarterly_wages_perworker_delta = quarterly_wages_perworker - quarterly_wages_perworker_lag,
                                quarterly_wages_perworker_deltalog = log(quarterly_wages_perworker) - log(quarterly_wages_perworker_lag),
                                quarterly_wages_perworker_deltalead = quarterly_wages_perworker_lead - quarterly_wages_perworker,
                                quarterly_wages_perworker_log = log(quarterly_wages_perworker)
)

#### Economy: cross-sectional relationship ####
## set up binned econ variable:
citysumm_econ_quarter$quarterly_wages_perworker_bin <- cut(citysumm_econ_quarter$quarterly_wages_perworker,
                                                           breaks = seq(min(citysumm_econ_quarter$quarterly_wages_perworker,na.rm=T),
                                                                        max(citysumm_econ_quarter$quarterly_wages_perworker,na.rm=T),
                                                                        length.out = 31))
bin.df <- data.frame(mid=seq(min(citysumm_econ_quarter$quarterly_wages_perworker,na.rm=T),
                             max(citysumm_econ_quarter$quarterly_wages_perworker,na.rm=T),
                             length.out = 30),
                     n_total=tapply(citysumm_econ_quarter$prop_econ_gt10words, citysumm_econ_quarter$quarterly_wages_perworker_bin,
                                    function(x)sum(!is.na(x))),
                     prob_econdict10=tapply(citysumm_econ_quarter$prop_econ_gt10words,
                                            citysumm_econ_quarter$quarterly_wages_perworker_bin,
                                            mean, na.rm=T))

## Figure 2 (top panel):
pdf("Figures/Wages_vs_PropDev10_quarter_bin.pdf",width=10,height=7)
ggplot(citysumm_econ_quarter,
       aes(x=quarterly_wages_perworker,y=prop_econ_gt10words)) + 
  geom_point(aes(y=-0.01), # rugplot
             alpha=0.8,color="black",size=3,pch="l") +
  geom_point(data=bin.df,aes(x=mid,y=prob_econdict10,size=n_total),col="black") +
  geom_smooth(method = 'lm', formula = y ~ poly(x, 1), size=1.5,col="blue") + 
  theme_bw() + 
  scale_size_continuous("Number of PRs\nin city-quarter") + 
  labs(x = "MSA quarterly wages per worker (thousands of $)",
       y = "Proportion of press releases\nin quarter about economic development") + 
  theme(legend.position="none",text = element_text(size=20))
dev.off()


#### Main economy analyses ####
fit_wages_dlaglog_dict10_qlevel <- felm(prop_econ_gt10words ~ quarterly_wages_perworker_deltalog 
                                        | 0 | 0 | msa_fips,
                                        weights=citysumm_econ_quarter$num,
                                        data=citysumm_econ_quarter)
summary(fit_wages_dlaglog_dict10_qlevel) # insig pos effect

fit_wages_dlaglog_dict10_qlevel_cfe <- felm(prop_econ_gt10words ~ quarterly_wages_perworker_deltalog 
                                            | PRcity | 0 | msa_fips, 
                                            weights=citysumm_econ_quarter$num,
                                            data=citysumm_econ_quarter)
summary(fit_wages_dlaglog_dict10_qlevel_cfe) # insig pos effect

fit_wages_dlaglog_dict10_qlevel_cqfe <- felm(prop_econ_gt10words ~ quarterly_wages_perworker_deltalog
                                             | PRcity + quarter | 0 | msa_fips,
                                             weights=citysumm_econ_quarter$num,
                                             data=citysumm_econ_quarter)
summary(fit_wages_dlaglog_dict10_qlevel_cqfe) # sig pos effect


avg_econ_level <- mean(citysumm_econ_quarter$prop_econ_gt10words)
writeLines(text=as.character(round(100*abs(avg_econ_level),1)),con = "Tables/average_econ_dict10_qlevel.tex",sep="%")
(econ_effect_vs_average <- fit_wages_dlaglog_dict10_qlevel_cqfe$coefficients/avg_econ_level)
writeLines(text=as.character(round(100*abs(econ_effect_vs_average),0)),con = "Tables/econ_effect_vs_average_qlevel.tex",sep="%")
## within-city changes:
(0.084- -0.043)*fit_wages_dlaglog_dict10_qlevel_cqfe$coefficients


# city-demeaned versions
citysumm_econ_quarter <- citysumm_econ_quarter %>%
  group_by(PRcity) %>%
  mutate(prop_econ_gt10words_citymean = mean(prop_econ_gt10words,na.rm=T)) %>%
  ungroup() %>%
  mutate(prop_econ_gt10words_citydemeaned = prop_econ_gt10words - prop_econ_gt10words_citymean)
mean(citysumm_econ_quarter$prop_econ_gt10words_citydemeaned) # should be 0-ish
sd(citysumm_econ_quarter$prop_econ_gt10words_citydemeaned) # 0.085
((0.084- -0.043)*fit_wages_dlaglog_dict10_qlevel_cqfe$coefficients)/sd(citysumm_econ_quarter$prop_econ_gt10words_citydemeaned) # 0.081 of a within-city standard deviation


#### Main economy visuals (Figure 3) ####
## create fully city- and year-residualized versions of main vars:
citysumm_econ_quarter$random <- rnorm(n = nrow(citysumm_econ_quarter))
reg2.graph.outcome2 <-(felm(prop_econ_gt10words ~ random | PRcity + quarter,
                            weights=citysumm_econ_quarter$num[!is.na(citysumm_econ_quarter$quarterly_wages_perworker_deltalog) & !is.na(citysumm_econ_quarter$prop_econ_gt10words)],
                            data=citysumm_econ_quarter[!is.na(citysumm_econ_quarter$quarterly_wages_perworker_deltalog) & !is.na(citysumm_econ_quarter$prop_econ_gt10words),]))

reg2.graph.treatment2 <- (felm(quarterly_wages_perworker_deltalog ~ random | PRcity + quarter, 
                               weights=citysumm_econ_quarter$num[!is.na(citysumm_econ_quarter$quarterly_wages_perworker_deltalog) & !is.na(citysumm_econ_quarter$prop_econ_gt10words)],
                               data=citysumm_econ_quarter[!is.na(citysumm_econ_quarter$quarterly_wages_perworker_deltalog) & !is.na(citysumm_econ_quarter$prop_econ_gt10words),]))
predictions.outcome2 <- resid(reg2.graph.outcome2)
predictions.treatment2 <- resid(reg2.graph.treatment2)
data.predictions2 <- citysumm_econ_quarter[!is.na(citysumm_econ_quarter$quarterly_wages_perworker_deltalog) & !is.na(citysumm_econ_quarter$prop_econ_gt10words) & !is.na(citysumm_econ_quarter$PRcity) & !is.na(citysumm_econ_quarter$quarter),]
colnames(predictions.outcome2) <- "predictions.outcome"
colnames(predictions.treatment2) <- "predictions.treatment"
data.predictions2 <- data.frame(data.predictions2, predictions.outcome2, predictions.treatment2)


summary(data.predictions2$predictions.treatment) # -0.24 to 0.18
width=0.015
graph <- mutate(data.predictions2[!is.na(data.predictions2$quarterly_wages_perworker_deltalog) & !is.na(data.predictions2$prop_econ_gt10words),],
                bin = cut(predictions.treatment, breaks=seq(-0.25,0.2, width)))
mean_outcome <- summarise(graph, mean(predictions.outcome))
mean_outcome_raw <- summarise(graph, mean(predictions.outcome))

graph$predictions.outcome <- graph$predictions.outcome - mean(graph$predictions.outcome)
bin1.df <- data.frame(
  bin.mean=tapply(graph$predictions.outcome,
                  graph$bin, mean, na.rm=TRUE),
  mid=seq(-0.25 + width/2, 0.2 - width/2, width),
  N=tapply(graph$predictions.outcome, graph$bin, length))

pdf("Figures/WagesDiffLog_vs_ProbDev10_qlevel_2xresid.pdf",width=10,height=7)
ggplot(graph) + 
  geom_point(data=bin1.df,aes(x=mid,y=bin.mean,size=N),
             pch=19,col="black") +
  geom_smooth(data=subset(graph),
              aes(x = predictions.treatment, y = predictions.outcome),
              method = 'lm', formula = y ~ poly(x, 1), size=1.5,col="blue",se = F) + 
  theme_bw() + 
  scale_x_continuous("City- and quarter-residualized change in\nlog(quarterly MSA wages per worker)") + 
  scale_y_continuous("City- and quarter-residualized proportion\nof press releases about economic growth") + 
  scale_size_continuous("Number of\nPRs in bin") + 
  theme(text = element_text(size=20),legend.position="none")
dev.off()

#### Crime: aggregation ####
citysumm_crime_month <- metainfo_crime %>%
  group_by(PRcity, PRyear, PRmonth) %>%
  summarize(
    theta_dev_45 = mean(theta_dev_45,na.rm=T),
    theta_crime_45 = mean(theta_crime_45,na.rm=T),
    prop_econ_gt5words = mean(in_econdevdict5,na.rm=T),
    prop_econ_gt6words = mean(in_econdevdict6,na.rm=T),
    prop_econ_gt7words = mean(in_econdevdict7,na.rm=T),
    prop_econ_gt8words = mean(in_econdevdict8,na.rm=T),
    prop_econ_gt9words = mean(in_econdevdict9,na.rm=T),
    prop_econ_gt10words = mean(in_econdevdict10,na.rm=T),
    prop_econ_gt11words = mean(in_econdevdict11,na.rm=T),
    prop_econ_gt12words = mean(in_econdevdict12,na.rm=T),
    prop_econ_gt13words = mean(in_econdevdict13,na.rm=T),
    prop_econ_gt14words = mean(in_econdevdict14,na.rm=T),
    prop_econ_gt15words = mean(in_econdevdict15,na.rm=T),
    
    prop_crime_gt5words = mean(in_crimedict5,na.rm=T),
    prop_crime_gt6words = mean(in_crimedict6,na.rm=T),
    prop_crime_gt7words = mean(in_crimedict7,na.rm=T),
    prop_crime_gt8words = mean(in_crimedict8,na.rm=T),
    prop_crime_gt9words = mean(in_crimedict9,na.rm=T),
    prop_crime_gt10words = mean(in_crimedict10,na.rm=T),
    prop_crime_gt11words = mean(in_crimedict11,na.rm=T),
    prop_crime_gt12words = mean(in_crimedict12,na.rm=T),
    prop_crime_gt13words = mean(in_crimedict13,na.rm=T),
    prop_crime_gt14words = mean(in_crimedict14,na.rm=T),
    prop_crime_gt15words = mean(in_crimedict15,na.rm=T),
    
    num = n()
  )




citysumm_crime_month <- left_join(citysumm_crime_month, crime_bycity,
                                  by=c("PRcity"="PRcity","PRmonth"="inc_month","PRyear"="inc_year"))
citysumm_crime_month$yearmonth <- paste0(citysumm_crime_month$PRyear,citysumm_crime_month$PRmonth)


#### Crime: cross-sectional relationship ####

# creating bins for plots
citysumm_crime_month$nviolent_rate_bin <- cut(citysumm_crime_month$nviolent_rate,
                                              breaks = seq(min(citysumm_crime_month$nviolent_rate,na.rm=T),
                                                           max(citysumm_crime_month$nviolent_rate,na.rm=T),
                                                           length.out = 31))
bin.df <- data.frame(mid=seq(min(citysumm_crime_month$nviolent_rate,na.rm=T),
                             max(citysumm_crime_month$nviolent_rate,na.rm=T),
                             length.out = 30),
                     n_total=tapply(citysumm_crime_month$prop_crime_gt10words, citysumm_crime_month$nviolent_rate_bin,
                                    function(x)sum(!is.na(x))),
                     prob_crimedict10=tapply(citysumm_crime_month$prop_crime_gt10words,
                                             citysumm_crime_month$nviolent_rate_bin,
                                             mean, na.rm=T))

## Figure 2, bottom panel
pdf("Figures/CrimeRate_vs_PropCrime10_month_bin.pdf",width=10,height=7)
ggplot(citysumm_crime_month,
       aes(x=nviolent_rate,y=prop_crime_gt10words)) + 
  geom_point(data=citysumm_crime_month,aes(x=nviolent_rate,y=-0.01),
             alpha=0.8,color="black",size=3,pch="l") +
  geom_point(data=bin.df,aes(x=mid,y=prob_crimedict10,size=n_total),col="black") +
  geom_smooth(method = 'lm', formula = y ~ poly(x, 1), size=1.5,col="blue") + 
  theme_bw() + 
  scale_size_continuous("Number of PRs\nin city-month") + 
  labs(x = "Monthly violent crime rate per 100k",
       y = "Proportion of press releases\nin month about crime") + 
  theme(legend.position="none",text = element_text(size=20))
dev.off()


#### Main crime analyses ####
fit_crime_dlaglog_dict10_mlevel <- felm(prop_crime_gt10words ~ nviolent_rate_deltalog
                                        | 0 | 0 | fips_city  + yearmonth,
                                        weights=citysumm_crime_month$num,
                                        data=citysumm_crime_month)
summary(fit_crime_dlaglog_dict10_mlevel) # neg effect
fit_crime_dlaglog_dict10_mlevel_cfe <- felm(prop_crime_gt10words ~ nviolent_rate_deltalog
                                            | PRcity | 0 | fips_city  + yearmonth,
                                            weights=citysumm_crime_month$num,
                                            data=citysumm_crime_month)
summary(fit_crime_dlaglog_dict10_mlevel_cfe) # neg effect
fit_crime_dlaglog_dict10_mlevel_cmfe <- felm(prop_crime_gt10words ~ nviolent_rate_deltalog
                                             | PRcity + yearmonth | 0 | fips_city  + yearmonth,
                                             weights=citysumm_crime_month$num,
                                             data=citysumm_crime_month)
summary(fit_crime_dlaglog_dict10_mlevel_cmfe) # neg effect

avg_crime_level <- mean(citysumm_crime_month$prop_crime_gt10words)
writeLines(text=as.character(round(100*abs(avg_crime_level),1)),con = "Tables/average_crime_dict10_mlevel.tex",sep="%")
(crime_effect_vs_average <- fit_crime_dlaglog_dict10_mlevel_cmfe$coefficients/avg_crime_level)
writeLines(text=as.character(round(100*abs(crime_effect_vs_average),0)),con = "Tables/crime_effect_vs_average_mlevel.tex",sep="%")

## within-city changes:
(0.112- -0.133)*fit_crime_dlaglog_dict10_mlevel_cmfe$coefficients
# city-demeaned versions
citysumm_crime_month <- citysumm_crime_month %>%
  group_by(PRcity) %>%
  mutate(prop_crime_gt10words_citymean = mean(prop_crime_gt10words,na.rm=T)) %>%
  ungroup() %>%
  mutate(prop_crime_gt10words_citydemeaned = prop_crime_gt10words - prop_crime_gt10words_citymean)
mean(citysumm_crime_month$prop_crime_gt10words_citydemeaned) # should be 0-ish
sd(citysumm_crime_month$prop_crime_gt10words_citydemeaned) # 0.089
((0.112- -0.133)*fit_crime_dlaglog_dict10_mlevel_cmfe$coefficients)/sd(citysumm_crime_month$prop_crime_gt10words_citydemeaned) # 0.042 of a within-city standard deviation


## Table 1: Effects of both economy and crime
stargazer(fit_wages_dlaglog_dict10_qlevel_cqfe,fit_crime_dlaglog_dict10_mlevel_cmfe,
          covariate.labels = c("$\\Delta$ Log(quarterly wages per worker)","$\\Delta$ Log(violent crime rate per 100k + 1)"),digits = 2,omit = "Constant",
          dep.var.caption = "Proportion of documents having $>10$ words in:",dep.var.labels = c("Economic growth dictionary","Crime dictionary"),
          notes = c("Standard errors clustered by MSA and year-quarter,","in model 1 and by city and year-month in model 2"),
          summary.stat = NULL, float=F,omit.stat = "ser",
          add.lines = list(c("City fixed effects?", "$\\checkmark$","$\\checkmark$"),
                           c("Quarter fixed effects?","$\\checkmark$",""),
                           c("Month fixed effects?","","$\\checkmark$")),
          out = "Tables/fit_crimewages_difflog_dict10_qlevel.tex"
)



#### Main crime visual (Figure 4) ####
citysumm_crime_month$random <- rnorm(n = nrow(citysumm_crime_month))
reg2.graph.outcome2 <-(felm(prop_crime_gt10words ~ random | PRcity + yearmonth,
                            weights=citysumm_crime_month$num[!is.na(citysumm_crime_month$nviolent_rate_deltalog) & abs(citysumm_crime_month$nviolent_rate_deltalog)<1 & !is.na(citysumm_crime_month$prop_crime_gt10words)],
                            data=citysumm_crime_month[!is.na(citysumm_crime_month$nviolent_rate_deltalog) & abs(citysumm_crime_month$nviolent_rate_deltalog)<1  & !is.na(citysumm_crime_month$prop_crime_gt10words),]))

reg2.graph.treatment2 <- (felm(nviolent_rate_deltalog ~ random | PRcity + yearmonth, 
                               weights=citysumm_crime_month$num[!is.na(citysumm_crime_month$nviolent_rate_deltalog) & abs(citysumm_crime_month$nviolent_rate_deltalog)<1 & !is.na(citysumm_crime_month$prop_crime_gt10words)],
                               data=citysumm_crime_month[!is.na(citysumm_crime_month$nviolent_rate_deltalog) & abs(citysumm_crime_month$nviolent_rate_deltalog)<1  & !is.na(citysumm_crime_month$prop_crime_gt10words),]))
predictions.outcome2 <- resid(reg2.graph.outcome2)
predictions.treatment2 <- resid(reg2.graph.treatment2)
data.predictions2 <- citysumm_crime_month[!is.na(citysumm_crime_month$nviolent_rate_deltalog) & abs(citysumm_crime_month$nviolent_rate_deltalog)<1  & !is.na(citysumm_crime_month$prop_crime_gt10words) & !is.na(citysumm_crime_month$PRcity) & !is.na(citysumm_crime_month$yearmonth),]
colnames(predictions.outcome2) <- "predictions.outcome"
colnames(predictions.treatment2) <- "predictions.treatment"
data.predictions2 <- data.frame(data.predictions2, predictions.outcome2, predictions.treatment2)

# check regression:
summary(felm(predictions.outcome2 ~ predictions.treatment2 | 0 | 0 | fips_city + yearmonth,
             weights = data.predictions2$num,
             data=data.predictions2)) # sig neg effect


summary(data.predictions2$predictions.treatment) # -0.50 to 0.56
width=0.02
graph <- mutate(data.predictions2[!is.na(data.predictions2$nviolent_rate_deltalog) & !is.na(data.predictions2$prop_crime_gt10words),],
                bin = cut(predictions.treatment, breaks=seq(-0.57,0.57, width)))
mean_outcome <- summarise(graph, mean(predictions.outcome))
mean_outcome_raw <- summarise(graph, mean(predictions.outcome))

graph$predictions.outcome <- graph$predictions.outcome - mean(graph$predictions.outcome)
bin1.df <- data.frame(
  bin.mean=tapply(graph$predictions.outcome,
                  graph$bin, mean, na.rm=TRUE),
  mid=seq(-0.57 + width/2, 0.57 - width/2, width),
  N=tapply(graph$predictions.outcome, graph$bin, length))

pdf("Figures/CrimeRateDiffLog_vs_ProbCrime10_mlevel_2xresid.pdf",width=10,height=7)
ggplot(graph) + 
  geom_point(data=bin1.df,aes(x=mid,y=bin.mean,size=N),
             pch=19,col="black") +
  geom_smooth(data=subset(graph),
              aes(x = predictions.treatment, y = predictions.outcome),
              method = 'lm', formula = y ~ poly(x, 1), size=1.5,col="blue",se=F) + 
  #     geom_smooth(data=graph,
  #                 aes(x = predictions.treatment, y = predictions.outcome),
  #                 method = 'lm', formula = y ~ splines::bs(x,5), size=1.5,col="blue") + 
  theme_bw() + 
  scale_x_continuous("City- and month-residualized change in\nlog(violent crime rate)") + 
  scale_y_continuous("City- and month-residualized proportion\nof press releases about crime") + 
  scale_size_continuous("Number of\nPRs in bin") + 
  coord_cartesian(ylim = c(-0.1,0.1)) + 
  theme(text = element_text(size=20),legend.position="none")
dev.off()


## ------------ ##
#### Appendix ####
## ------------ ##

#### Appendix A: Topic Summaries from STM ####

topicNames_45 <- c("Education (1)",
                   "Public utilities (2)",
                   "Public health & safety (3)",
                   "Awards & global issues (4)",
                   "Festivals & events (5)",
                   "Weather preparedness (6)",
                   "Technology and administration (7)",
                   "Street repair (8)",
                   "Sporting events (9)",
                   "Personnel announcements (10)",
                   "Demographic statistics (11)",
                   "Water quality/sewer (12)",
                   "Infrastructure repairs & closures (13)",
                   "Emergency preparation & management (14)",
                   "Holiday closures/calendars (15)",
                   "Crime reports (16)",
                   "Affordable housing (17)",
                   "Performing arts & events (18)",
                   "Transit/road closures (19)",
                   "Misc. announcements (20)",
                   "Fiscal affairs (21)",
                   "Voting & elections/airports (22)",
                   "Crime reports (23)",
                   "City planning (24)",
                   "Parks & public facilities (25)",
                   "Events/intergovernmental affairs (26)",
                   "Sustainability/energy (27)",
                   "Celebrations & memorials (28)",
                   "Parking restrictions & road closures (29)",
                   "Arts & culture (30)",
                   "Economic development (31)",
                   "Misc. mayoral announcements (32)",
                   "Garbage & recycling (33)",
                   "Libraries (34)",
                   "Parks & recreational programs (35)",
                   "Street & infrastructure repairs (36)",
                   "Mayoral announcements (37)",
                   "Law enforcement (38)",
                   "Permitting/licensing (39)",
                   "City council/govt boards (40)",
                   "Community/vounteer events (41)",
                   "Speech transcripts (42)",
                   "Transportation (43)",
                   "Animals and pets (44)",
                   "Legislation/intergovernmental affairs (45)"
)

frequency_45 <- colMeans(PRfit45$theta)

topictable_45 <- data.frame(label=topicNames_45,
                            highprob=apply(labelTopics(PRfit45,n=40)$prob[,1:5],1,paste,collapse=", "),
                            freq=round(frequency_45,4)*100)
colnames(topictable_45) <- c("Topic Summary Label", "Highest Probability Words","Frequency in Corpus (%)")


## Table A-1: STM topic summaries and frequencies
print(xtable(topictable_45[order(topictable_45$`Frequency in Corpus (%)`,decreasing=T),],
             caption = paste0("Topic Summary for STM, K=45"),
             label=paste0("tab:topic45_labelsummary")),
      file=paste0("Tables/topic45_labelsummary.tex"),
      include.rownames=F,floating = FALSE,
      hline.after=NULL, add.to.row=list(pos=list(-1,0,nrow(topictable_45)),
                                        command=c('\\toprule\n',
                                                  '\\midrule\n',
                                                  '\\bottomrule\n')))


#### Appendix B: Placebo Checks ####

## Economy placebo checks
# original analysis:
fit_wages_dlaglog_dict10_qlevel_cqfe <- felm(prop_econ_gt10words ~ quarterly_wages_perworker_deltalog
                                             | PRcity + quarter | 0 | msa_fips , weights=citysumm_econ_quarter$num,
                                             data=citysumm_econ_quarter)
summary(fit_wages_dlaglog_dict10_qlevel_cqfe) # sig pos effect


# set up lagged DV:
citysumm_econ_quarter <- citysumm_econ_quarter %>%
  group_by(PRcity) %>%
  mutate(prop_econ_gt10words_lag1 = lag(prop_econ_gt10words,n = 1,order_by = quarter),
         prop_econ_gt10words_lag2 = lag(prop_econ_gt10words,n = 2,order_by = quarter),
         prop_econ_gt10words_lag3 = lag(prop_econ_gt10words,n = 3,order_by = quarter),
         prop_econ_gt10words_lag4 = lag(prop_econ_gt10words,n = 4,order_by = quarter),
  )

fit_wages_dlaglog_dict10_lag1_qlevel_cqfe <- felm(prop_econ_gt10words_lag1 ~ quarterly_wages_perworker_deltalog
                                                  | PRcity + quarter | 0 | msa_fips,
                                                  weights=citysumm_econ_quarter$num,
                                                  data=citysumm_econ_quarter)
summary(fit_wages_dlaglog_dict10_lag1_qlevel_cqfe) # insig pos effect

fit_wages_dlaglog_dict10_lag2_qlevel_cqfe <- felm(prop_econ_gt10words_lag2 ~ quarterly_wages_perworker_deltalog
                                                  | PRcity + quarter | 0 | msa_fips,
                                                  weights=citysumm_econ_quarter$num,
                                                  data=citysumm_econ_quarter)
summary(fit_wages_dlaglog_dict10_lag2_qlevel_cqfe) # insig pos effect

fit_wages_dlaglog_dict10_lag3_qlevel_cqfe <- felm(prop_econ_gt10words_lag3 ~ quarterly_wages_perworker_deltalog
                                                  | PRcity + quarter | 0 | msa_fips,
                                                  weights=citysumm_econ_quarter$num,
                                                  data=citysumm_econ_quarter)
summary(fit_wages_dlaglog_dict10_lag3_qlevel_cqfe) # insig pos effect

fit_wages_dlaglog_dict10_lag4_qlevel_cqfe <- felm(prop_econ_gt10words_lag4 ~ quarterly_wages_perworker_deltalog
                                                  | PRcity + quarter | 0 | msa_fips,
                                                  weights=citysumm_econ_quarter$num,
                                                  data=citysumm_econ_quarter)
summary(fit_wages_dlaglog_dict10_lag4_qlevel_cqfe) # insig pos effect

## Placebo figure:
parallel_trends <- rbind(tidy(fit_wages_dlaglog_dict10_qlevel_cqfe)[1,],
                         tidy(fit_wages_dlaglog_dict10_lag1_qlevel_cqfe)[1,],
                         tidy(fit_wages_dlaglog_dict10_lag2_qlevel_cqfe)[1,],
                         tidy(fit_wages_dlaglog_dict10_lag3_qlevel_cqfe)[1,],
                         tidy(fit_wages_dlaglog_dict10_lag4_qlevel_cqfe)[1,]
)

parallel_trends$term <- seq(0,-4,-1)
parallel_trends$term[parallel_trends$term==0] <- "Current quarter"
parallel_trends$term[parallel_trends$term==-1]<-"-1 quarter"
parallel_trends$term[parallel_trends$term==-2]<-"-2 quarters"
parallel_trends$term[parallel_trends$term==-3]<-"-3 quarters"
parallel_trends$term[parallel_trends$term==-4]<-"-4 quarters"
parallel_trends$term <- factor(parallel_trends$term,levels = c("-4 quarters","-3 quarters","-2 quarters","-1 quarter","Current quarter"),ordered=T)


parallel_trends$plotorder <- c(5:1)

parallel_deltas_econ <- ggplot(data = parallel_trends, aes(x = estimate, y = plotorder)) +
  geom_errorbarh(data = parallel_trends, lwd=1, height=0.0,col="black", 
                 aes(y = plotorder, 
                     xmin = estimate + qnorm(0.025)*std.error, 
                     xmax = estimate + qnorm(0.975)*std.error)
  ) +
  geom_point(size = 3.75,col="black",shape=16) + 
  scale_x_continuous("Effect of 1% change in\nlog(MSA wages per worker)") +
  scale_y_continuous("Proportion of press releases\nabout economic growth in...",breaks=c(1:5),labels=arrange(parallel_trends,plotorder)$term,limits=c(0.9,5.1)) +
  geom_vline(xintercept = 0,size=.5,colour="black",linetype="dotted") +
  geom_hline(yintercept = 4.5, linetype="dashed") + 
  theme_bw() +
  coord_flip() + 
  theme(axis.text.y = element_text(size=10, angle = 0, hjust = 1, color="black")) +  
  # theme(axis.title.y = element_blank()) +
  theme(axis.text.x = element_text(size=10)) + 
  theme(axis.title.x = element_text(size=15)) +
  theme(legend.position = "none")

ggsave("Figures/parallel_trends_deltas_econ.pdf",parallel_deltas_econ,height=3,width=7)



## Crime placebo checks
# original analysis:
fit_crime_dlaglog_dict10_mlevel_cmfe <- felm(prop_crime_gt10words ~ nviolent_rate_deltalog
                                             | PRcity + yearmonth | 0 | fips_city  + yearmonth,
                                             weights=citysumm_crime_month$num,
                                             data=citysumm_crime_month)
summary(fit_crime_dlaglog_dict10_mlevel_cmfe) # marg neg effect


## set up lagged DV:
citysumm_crime_month <- citysumm_crime_month %>%
  group_by(PRcity) %>%
  mutate(prop_crime_gt10words_lag1 = lag(prop_crime_gt10words,n = 1,order_by = yearmonth),
         prop_crime_gt10words_lag2 = lag(prop_crime_gt10words,n = 2,order_by = yearmonth),
         prop_crime_gt10words_lag3 = lag(prop_crime_gt10words,n = 3,order_by = yearmonth),
         prop_crime_gt10words_lag4 = lag(prop_crime_gt10words,n = 4,order_by = yearmonth),
  )

fit_crime_dlaglog_dict10_lag1_mlevel_cmfe <- felm(prop_crime_gt10words_lag1 ~ nviolent_rate_deltalog
                                                  | PRcity + yearmonth | 0 | fips_city  + yearmonth,
                                                  weights=citysumm_crime_month$num,
                                                  data=citysumm_crime_month)
summary(fit_crime_dlaglog_dict10_lag1_mlevel_cmfe) # insig neg effect

fit_crime_dlaglog_dict10_lag2_mlevel_cmfe <- felm(prop_crime_gt10words_lag2 ~ nviolent_rate_deltalog
                                                  | PRcity + yearmonth | 0 | fips_city  + yearmonth,
                                                  weights=citysumm_crime_month$num,
                                                  data=citysumm_crime_month)
summary(fit_crime_dlaglog_dict10_lag2_mlevel_cmfe) # insig pos effect

fit_crime_dlaglog_dict10_lag3_mlevel_cmfe <- felm(prop_crime_gt10words_lag3 ~ nviolent_rate_deltalog
                                                  | PRcity + yearmonth | 0 | fips_city  + yearmonth,
                                                  weights=citysumm_crime_month$num,
                                                  data=citysumm_crime_month)
summary(fit_crime_dlaglog_dict10_lag3_mlevel_cmfe) # insig neg effect

fit_crime_dlaglog_dict10_lag4_mlevel_cmfe <- felm(prop_crime_gt10words_lag4 ~ nviolent_rate_deltalog
                                                  | PRcity + yearmonth | 0 | fips_city  + yearmonth,
                                                  weights=citysumm_crime_month$num,
                                                  data=citysumm_crime_month)
summary(fit_crime_dlaglog_dict10_lag4_mlevel_cmfe) # insig pos effect

## Figure A2: Crime placebo check
parallel_trends <- rbind(tidy(fit_crime_dlaglog_dict10_mlevel_cmfe)[1,],
                         tidy(fit_crime_dlaglog_dict10_lag1_mlevel_cmfe)[1,],
                         tidy(fit_crime_dlaglog_dict10_lag2_mlevel_cmfe)[1,],
                         tidy(fit_crime_dlaglog_dict10_lag3_mlevel_cmfe)[1,],
                         tidy(fit_crime_dlaglog_dict10_lag4_mlevel_cmfe)[1,]
)
parallel_trends$term <- seq(0,-4,-1)
parallel_trends$term[parallel_trends$term==0] <- "Current month"
parallel_trends$term[parallel_trends$term==-1]<-"-1 month"
parallel_trends$term[parallel_trends$term==-2]<-"-2 months"
parallel_trends$term[parallel_trends$term==-3]<-"-3 months"
parallel_trends$term[parallel_trends$term==-4]<-"-4 months"
parallel_trends$term <- factor(parallel_trends$term,levels = c("-4 months","-3 months","-2 months","-1 month","Current month"),ordered=T)

parallel_trends$plotorder <- c(5:1)

parallel_deltas_crime <- ggplot(data = parallel_trends, aes(x = estimate, y = plotorder)) +
  geom_errorbarh(data = parallel_trends, lwd=1, height=0.0,col="black", 
                 aes(y = plotorder, 
                     xmin = estimate + qnorm(0.025)*std.error, 
                     xmax = estimate + qnorm(0.975)*std.error)
  ) +
  geom_point(size = 3.75,col="black",shape=16) + 
  scale_x_continuous("Effect of 1% change in\nlog(violent crime rate + 1)") +
  scale_y_continuous("Proportion of press releases\nabout crime in...",breaks=c(1:5),labels=arrange(parallel_trends,plotorder)$term,limits=c(0.9,5.1)) +
  geom_vline(xintercept = 0,size=.5,colour="black",linetype="dotted") +
  geom_hline(yintercept = 4.5, linetype="dashed") + 
  theme_bw() +
  coord_flip() + 
  theme(axis.text.y = element_text(size=10, angle = 0, hjust = 1, color="black")) +  
  # theme(axis.title.y = element_blank()) +
  theme(axis.text.x = element_text(size=10)) + 
  theme(axis.title.x = element_text(size=15)) +
  theme(legend.position = "none")

ggsave("Figures/parallel_trends_deltas_crime.pdf",parallel_deltas_crime,height=3,width=7)




#### Appendix C: Additional Model Specifications ####

## Table A2: economy results with and without FEs
stargazer(fit_wages_dlaglog_dict10_qlevel,fit_wages_dlaglog_dict10_qlevel_cfe,fit_wages_dlaglog_dict10_qlevel_cqfe,
          covariate.labels = "$\\Delta$ Log(quarterly wages per worker)",digits = 2,omit = "Constant",
          dep.var.caption = "Proportion of documents having $>10$ words in:",dep.var.labels = "Economic growth dictionary",
          notes = "Standard errors clustered by MSA and year-quarter",
          summary.stat = NULL, float=F,omit.stat = "ser",
          add.lines = list(c("City fixed effects?", "","$\\checkmark$","$\\checkmark$"),
                           c("Quarter fixed effects?", "","","$\\checkmark$")),
          out = "Tables/fit_wages_difflog_dict10_qlevel_withoutfes.tex"
)

## Economy analyses, other thresholds
fit_wages_dlaglog_dict5_qlevel_cqfe <- felm(prop_econ_gt5words ~ quarterly_wages_perworker_deltalog
                                            | PRcity + quarter | 0 | msa_fips,
                                            weights=citysumm_econ_quarter$num,
                                            data=citysumm_econ_quarter)
summary(fit_wages_dlaglog_dict5_qlevel_cqfe) # sig pos effect
fit_wages_dlaglog_dict6_qlevel_cqfe <- felm(prop_econ_gt6words ~ quarterly_wages_perworker_deltalog
                                            | PRcity + quarter | 0 | msa_fips,
                                            weights=citysumm_econ_quarter$num,
                                            data=citysumm_econ_quarter)
summary(fit_wages_dlaglog_dict6_qlevel_cqfe) # sig pos effect
fit_wages_dlaglog_dict7_qlevel_cqfe <- felm(prop_econ_gt7words ~ quarterly_wages_perworker_deltalog
                                            | PRcity + quarter | 0 | msa_fips,
                                            weights=citysumm_econ_quarter$num,
                                            data=citysumm_econ_quarter)
summary(fit_wages_dlaglog_dict7_qlevel_cqfe) # sig pos effect
fit_wages_dlaglog_dict8_qlevel_cqfe <- felm(prop_econ_gt8words ~ quarterly_wages_perworker_deltalog
                                            | PRcity + quarter | 0 | msa_fips,
                                            weights=citysumm_econ_quarter$num,
                                            data=citysumm_econ_quarter)
summary(fit_wages_dlaglog_dict8_qlevel_cqfe) # sig pos effect
fit_wages_dlaglog_dict9_qlevel_cqfe <- felm(prop_econ_gt9words ~ quarterly_wages_perworker_deltalog
                                            | PRcity + quarter | 0 | msa_fips,
                                            weights=citysumm_econ_quarter$num,
                                            data=citysumm_econ_quarter)
summary(fit_wages_dlaglog_dict9_qlevel_cqfe) # sig pos effect

fit_wages_dlaglog_dict11_qlevel_cqfe <- felm(prop_econ_gt11words ~ quarterly_wages_perworker_deltalog
                                             | PRcity + quarter | 0 | msa_fips,
                                             weights=citysumm_econ_quarter$num,
                                             data=citysumm_econ_quarter)
summary(fit_wages_dlaglog_dict11_qlevel_cqfe) # sig pos effect
fit_wages_dlaglog_dict12_qlevel_cqfe <- felm(prop_econ_gt12words ~ quarterly_wages_perworker_deltalog
                                             | PRcity + quarter | 0 | msa_fips,
                                             weights=citysumm_econ_quarter$num,
                                             data=citysumm_econ_quarter)
summary(fit_wages_dlaglog_dict12_qlevel_cqfe) # sig pos effect
fit_wages_dlaglog_dict13_qlevel_cqfe <- felm(prop_econ_gt13words ~ quarterly_wages_perworker_deltalog
                                             | PRcity + quarter | 0 | msa_fips,
                                             weights=citysumm_econ_quarter$num,
                                             data=citysumm_econ_quarter)
summary(fit_wages_dlaglog_dict13_qlevel_cqfe) # sig pos effect
fit_wages_dlaglog_dict14_qlevel_cqfe <- felm(prop_econ_gt14words ~ quarterly_wages_perworker_deltalog
                                             | PRcity + quarter | 0 | msa_fips,
                                             weights=citysumm_econ_quarter$num,
                                             data=citysumm_econ_quarter)
summary(fit_wages_dlaglog_dict14_qlevel_cqfe) # sig pos effect
fit_wages_dlaglog_dict15_qlevel_cqfe <- felm(prop_econ_gt15words ~ quarterly_wages_perworker_deltalog
                                             | PRcity + quarter | 0 | msa_fips,
                                             weights=citysumm_econ_quarter$num,
                                             data=citysumm_econ_quarter)
summary(fit_wages_dlaglog_dict15_qlevel_cqfe) # sig pos effect


## Table A4: economy effects, other thresholds
stargazer(fit_wages_dlaglog_dict15_qlevel_cqfe,
          fit_wages_dlaglog_dict14_qlevel_cqfe,
          fit_wages_dlaglog_dict13_qlevel_cqfe,
          fit_wages_dlaglog_dict12_qlevel_cqfe,
          fit_wages_dlaglog_dict11_qlevel_cqfe,
          fit_wages_dlaglog_dict10_qlevel_cqfe,
          fit_wages_dlaglog_dict9_qlevel_cqfe,
          fit_wages_dlaglog_dict8_qlevel_cqfe,
          fit_wages_dlaglog_dict7_qlevel_cqfe,
          fit_wages_dlaglog_dict6_qlevel_cqfe,
          fit_wages_dlaglog_dict5_qlevel_cqfe,
          covariate.labels = c("$\\Delta$ Log(quarterly wages per worker)"),digits = 2,omit = "Constant",
          dep.var.caption = "Proportion of documents having $x$ words in dictionary:",
          dep.var.labels = c("$>15$","$>14$","$>13$","$>12$","$>11$","$>10$","$>9$","$>8$","$>7$","$>6$","$>5$"),
          notes = "Standard errors clustered by MSA and year-quarter",
          summary.stat = NULL, float=F,omit.stat = "ser",
          add.lines = list(c("City fixed effects?","$\\checkmark$","$\\checkmark$","$\\checkmark$","$\\checkmark$","$\\checkmark$","$\\checkmark$","$\\checkmark$","$\\checkmark$","$\\checkmark$","$\\checkmark$","$\\checkmark$"),
                           c("Quarter fixed effects?","$\\checkmark$","$\\checkmark$","$\\checkmark$","$\\checkmark$","$\\checkmark$","$\\checkmark$","$\\checkmark$","$\\checkmark$","$\\checkmark$","$\\checkmark$","$\\checkmark$")),
          out = "Tables/fit_wagesdifflog_dict_qlevel_all_threshold.tex"
)


thresholds <- rbind(tidy(fit_wages_dlaglog_dict5_qlevel_cqfe),
                    tidy(fit_wages_dlaglog_dict6_qlevel_cqfe),
                    tidy(fit_wages_dlaglog_dict7_qlevel_cqfe),
                    tidy(fit_wages_dlaglog_dict8_qlevel_cqfe),
                    tidy(fit_wages_dlaglog_dict9_qlevel_cqfe),
                    tidy(fit_wages_dlaglog_dict10_qlevel_cqfe),
                    tidy(fit_wages_dlaglog_dict11_qlevel_cqfe),
                    tidy(fit_wages_dlaglog_dict12_qlevel_cqfe),
                    tidy(fit_wages_dlaglog_dict13_qlevel_cqfe),
                    tidy(fit_wages_dlaglog_dict14_qlevel_cqfe),
                    tidy(fit_wages_dlaglog_dict15_qlevel_cqfe)
)

thresholds$term <- seq(5,15,1)
thresholds$term <- paste0(">",thresholds$term," words")
# thresholds$term <- factor(thresholds$term,levels = c("-4 quarters","-3 quarters","-2 quarters","-1 quarter","Current quarter"),ordered=T)

thresholds$plotorder <- c(5:15)
thresholds$color <- c(rep("black",5),"red",rep("black",5))

thresholds_econ <- ggplot(data = thresholds, aes(x = estimate, y = plotorder)) +
  geom_errorbarh(data = thresholds, lwd=1, height=0.0, col=thresholds$color,
                 aes(y = plotorder, 
                     xmin = estimate + qnorm(0.025)*std.error, 
                     xmax = estimate + qnorm(0.975)*std.error)
  ) +
  geom_point(size = 3.75,col=thresholds$color,shape=16) + 
  scale_x_continuous("Effect of 1% change in\nlog(MSA wages per worker)") +
  scale_y_continuous("Threshold value of words in dictionary",
                     breaks=c(5:15),
                     labels=arrange(thresholds,plotorder)$term,limits=c(4.9,15.1)) +
  geom_vline(xintercept = 0,size=.5,colour="black",linetype="dotted") +
  theme_bw() +
  coord_flip() + 
  theme(axis.text.y = element_text(size=10, angle = 0, hjust = 1, color="black")) +  
  # theme(axis.title.y = element_blank()) +
  theme(axis.text.x = element_text(size=10)) + 
  theme(axis.title.x = element_text(size=15)) +
  theme(legend.position = "none")

ggsave("Figures/thresholds_econ.pdf",thresholds_econ,height=3,width=9)





## Crime analyses, other thresholds
fit_crime_dlaglog_dict5_mlevel_cmfe <- felm(prop_econ_gt5words ~ nviolent_rate_deltalog
                                            | PRcity + yearmonth | 0 | PRcity + yearmonth, 
                                            weights=citysumm_crime_month$num,
                                            data=citysumm_crime_month)
summary(fit_crime_dlaglog_dict5_mlevel_cmfe) # insig pos effect
fit_crime_dlaglog_dict6_mlevel_cmfe <- felm(prop_econ_gt6words ~ nviolent_rate_deltalog
                                            | PRcity + yearmonth | 0 | PRcity + yearmonth, 
                                            weights=citysumm_crime_month$num,
                                            data=citysumm_crime_month)
summary(fit_crime_dlaglog_dict6_mlevel_cmfe) # insig pos effect
fit_crime_dlaglog_dict7_mlevel_cmfe <- felm(prop_econ_gt7words ~ nviolent_rate_deltalog
                                            | PRcity + yearmonth | 0 | PRcity + yearmonth, 
                                            weights=citysumm_crime_month$num,
                                            data=citysumm_crime_month)
summary(fit_crime_dlaglog_dict7_mlevel_cmfe) # insig neg effect
fit_crime_dlaglog_dict8_mlevel_cmfe <- felm(prop_econ_gt8words ~ nviolent_rate_deltalog
                                            | PRcity + yearmonth | 0 | PRcity + yearmonth, 
                                            weights=citysumm_crime_month$num,
                                            data=citysumm_crime_month)
summary(fit_crime_dlaglog_dict8_mlevel_cmfe) # insig neg effect
fit_crime_dlaglog_dict9_mlevel_cmfe <- felm(prop_econ_gt9words ~ nviolent_rate_deltalog
                                            | PRcity + yearmonth | 0 | PRcity + yearmonth, 
                                            weights=citysumm_crime_month$num,
                                            data=citysumm_crime_month)
summary(fit_crime_dlaglog_dict9_mlevel_cmfe) # insig neg effect

fit_crime_dlaglog_dict11_mlevel_cmfe <- felm(prop_econ_gt11words ~ nviolent_rate_deltalog
                                             | PRcity + yearmonth | 0 | PRcity + yearmonth, 
                                             weights=citysumm_crime_month$num,
                                             data=citysumm_crime_month)
summary(fit_crime_dlaglog_dict11_mlevel_cmfe) # insig neg effect
fit_crime_dlaglog_dict12_mlevel_cmfe <- felm(prop_econ_gt12words ~ nviolent_rate_deltalog
                                             | PRcity + yearmonth | 0 | PRcity + yearmonth, 
                                             weights=citysumm_crime_month$num,
                                             data=citysumm_crime_month)
summary(fit_crime_dlaglog_dict12_mlevel_cmfe) # insig pos effect
fit_crime_dlaglog_dict13_mlevel_cmfe <- felm(prop_econ_gt13words ~ nviolent_rate_deltalog
                                             | PRcity + yearmonth | 0 | PRcity + yearmonth, 
                                             weights=citysumm_crime_month$num,
                                             data=citysumm_crime_month)
summary(fit_crime_dlaglog_dict13_mlevel_cmfe) # insig pos effect
fit_crime_dlaglog_dict14_mlevel_cmfe <- felm(prop_econ_gt14words ~ nviolent_rate_deltalog
                                             | PRcity + yearmonth | 0 | PRcity + yearmonth, 
                                             weights=citysumm_crime_month$num,
                                             data=citysumm_crime_month)
summary(fit_crime_dlaglog_dict14_mlevel_cmfe) # insig pos effect
fit_crime_dlaglog_dict15_mlevel_cmfe <- felm(prop_econ_gt15words ~ nviolent_rate_deltalog
                                             | PRcity + yearmonth | 0 | PRcity + yearmonth, 
                                             weights=citysumm_crime_month$num,
                                             data=citysumm_crime_month)
summary(fit_crime_dlaglog_dict15_mlevel_cmfe) # insig pos effect


## Table A3: results with and without fixed effects
stargazer(fit_crime_dlaglog_dict10_mlevel,fit_crime_dlaglog_dict10_mlevel_cfe,fit_crime_dlaglog_dict10_mlevel_cmfe,
          #           fit_crime_dlaglog_dict10_qlevel,fit_crime_dlaglog_dict10_qlevel_cfe,fit_crime_dlaglog_dict10_qlevel_cmfe,
          covariate.labels = "$\\Delta$ Log(violent crime rate)",digits = 2,omit = "Constant",
          dep.var.caption = "Proportion of documents having $>10$ words in:",dep.var.labels = "Crime dictionary",
          notes = "Standard errors clustered by city and year-month",
          summary.stat = NULL, float=F,omit.stat = "ser",
          add.lines = list(c("City fixed effects?", "","$\\checkmark$","$\\checkmark$"),
                           c("Month fixed effects?", "","","$\\checkmark$")),
          #           caption = "Effect of $\\Delta$ Wages on Probability(Econ Development)",
          #           label="tab:wagesdiff_dict_qlevel",
          out = "Tables/fit_crime_difflog_dict10_mlevel_withoutfes.tex"
)

## Table A5: Crime, alternative Thresholds
stargazer(fit_crime_dlaglog_dict15_mlevel_cmfe,
          fit_crime_dlaglog_dict14_mlevel_cmfe,
          fit_crime_dlaglog_dict13_mlevel_cmfe,
          fit_crime_dlaglog_dict12_mlevel_cmfe,
          fit_crime_dlaglog_dict11_mlevel_cmfe,
          fit_crime_dlaglog_dict10_mlevel_cmfe,
          fit_crime_dlaglog_dict9_mlevel_cmfe,
          fit_crime_dlaglog_dict8_mlevel_cmfe,
          fit_crime_dlaglog_dict7_mlevel_cmfe,
          fit_crime_dlaglog_dict6_mlevel_cmfe,
          fit_crime_dlaglog_dict5_mlevel_cmfe,
          covariate.labels = c("$\\Delta$ Log(violent crime rate)"),digits = 2,omit = "Constant",
          dep.var.caption = "Proportion of documents having $x$ words in dictionary:",
          dep.var.labels = c("$>15$","$>14$","$>13$","$>12$","$>11$","$>10$","$>9$","$>8$","$>7$","$>6$","$>5$"),
          notes = "Standard errors clustered by city and year-month",
          summary.stat = NULL, float=F,omit.stat = "ser",
          add.lines = list(c("City fixed effects?","$\\checkmark$","$\\checkmark$","$\\checkmark$","$\\checkmark$","$\\checkmark$","$\\checkmark$","$\\checkmark$","$\\checkmark$","$\\checkmark$","$\\checkmark$","$\\checkmark$"),
                           c("Month fixed effects?","$\\checkmark$","$\\checkmark$","$\\checkmark$","$\\checkmark$","$\\checkmark$","$\\checkmark$","$\\checkmark$","$\\checkmark$","$\\checkmark$","$\\checkmark$","$\\checkmark$")),
          out = "Tables/fit_crimedifflog_dict_mlevel_all_threshold.tex"
)


thresholds <- rbind(tidy(fit_crime_dlaglog_dict5_mlevel_cmfe),
                    tidy(fit_crime_dlaglog_dict6_mlevel_cmfe),
                    tidy(fit_crime_dlaglog_dict7_mlevel_cmfe),
                    tidy(fit_crime_dlaglog_dict8_mlevel_cmfe),
                    tidy(fit_crime_dlaglog_dict9_mlevel_cmfe),
                    tidy(fit_crime_dlaglog_dict10_mlevel_cmfe),
                    tidy(fit_crime_dlaglog_dict11_mlevel_cmfe),
                    tidy(fit_crime_dlaglog_dict12_mlevel_cmfe),
                    tidy(fit_crime_dlaglog_dict13_mlevel_cmfe),
                    tidy(fit_crime_dlaglog_dict14_mlevel_cmfe),
                    tidy(fit_crime_dlaglog_dict15_mlevel_cmfe)
)

thresholds$term <- seq(5,15,1)
thresholds$term <- paste0(">",thresholds$term," words")

thresholds$plotorder <- c(5:15)
thresholds$color <- c(rep("black",5),"red",rep("black",5))

thresholds_crime <- ggplot(data = thresholds, aes(x = estimate, y = plotorder)) +
  geom_errorbarh(data = thresholds, lwd=1, height=0.0, col=thresholds$color,
                 aes(y = plotorder, 
                     xmin = estimate + qnorm(0.025)*std.error, 
                     xmax = estimate + qnorm(0.975)*std.error)
  ) +
  geom_point(size = 3.75,col=thresholds$color,shape=16) + 
  scale_x_continuous("Effect of 1% change in\nlog(violent crime rate + 1)") +
  scale_y_continuous("Threshold value of words in dictionary",
                     breaks=c(5:15),
                     labels=arrange(thresholds,plotorder)$term,limits=c(4.9,15.1)) +
  geom_vline(xintercept = 0,size=.5,colour="black",linetype="dotted") +
  theme_bw() +
  coord_flip() + 
  theme(axis.text.y = element_text(size=10, angle = 0, hjust = 1, color="black")) +  
  # theme(axis.title.y = element_blank()) +
  theme(axis.text.x = element_text(size=10)) + 
  theme(axis.title.x = element_text(size=15)) +
  theme(legend.position = "none")

ggsave("Figures/thresholds_crime.pdf",thresholds_crime,height=3,width=9)





#### Appendix D: Results with PR-level outcomes ####

## Econ analyses, PR-level
fit_wages_dlaglog_dict10_prlevel <- felm(in_econdevdict10 ~ quarterly_wages_perworker_deltalog | 0 | 0 | msa_fips + quarter
                                         ,data=metainfo_econ)
summary(fit_wages_dlaglog_dict10_prlevel) # insig pos effect

fit_wages_dlaglog_dict10_prlevel_cfe <- felm(in_econdevdict10 ~ quarterly_wages_perworker_deltalog 
                                             | PRcity | 0 | msa_fips + quarter
                                             ,data=metainfo_econ)
summary(fit_wages_dlaglog_dict10_prlevel_cfe) # insig pos effect

fit_wages_dlaglog_dict10_prlevel_cqfe <- felm(in_econdevdict10 ~ quarterly_wages_perworker_deltalog
                                              | PRcity  + quarter | 0 | msa_fips + quarter
                                              ,data=metainfo_econ)
summary(fit_wages_dlaglog_dict10_prlevel_cqfe) # sig pos effect


## Table A6: economy effect on PR-level, with and without FEs
stargazer(fit_wages_dlaglog_dict10_prlevel,fit_wages_dlaglog_dict10_prlevel_cfe,fit_wages_dlaglog_dict10_prlevel_cqfe,
          covariate.labels = c("$\\Delta$ Log(quarterly wages per worker)"),digits = 2,omit = "Constant",
          dep.var.caption = "Probability of document having",dep.var.labels="$>10$ words in dictionary",
          notes = "Standard errors clustered by MSA and year-quarter",
          summary.stat = NULL, float=F,omit.stat = "ser",
          add.lines = list(c("City fixed effects?", "","$\\checkmark$","$\\checkmark$"),
                           c("Quarter fixed effects?", "","","$\\checkmark$")),
          #           caption = "Effect of $\\Delta$ Wages on Probability(Econ Development)",
          #           label="tab:wagesdiff_dict_prlevel",
          out = "Tables/fit_wagesdifflog_dict10_prlevel.tex"
)




## alternative thresholds, PR-level:
fit_wages_dlaglog_dict15_prlevel_cqfe <- felm(in_econdevdict15 ~ quarterly_wages_perworker_deltalog
                                              | PRcity  + quarter | 0 | msa_fips + quarter,
                                              data=metainfo_econ)
summary(fit_wages_dlaglog_dict15_prlevel_cqfe) # sig pos effect
fit_wages_dlaglog_dict14_prlevel_cqfe <- felm(in_econdevdict14 ~ quarterly_wages_perworker_deltalog
                                              | PRcity  + quarter | 0 | msa_fips + quarter,
                                              data=metainfo_econ)
summary(fit_wages_dlaglog_dict14_prlevel_cqfe) # marg pos effect
fit_wages_dlaglog_dict13_prlevel_cqfe <- felm(in_econdevdict13 ~ quarterly_wages_perworker_deltalog
                                              | PRcity  + quarter | 0 | msa_fips + quarter,
                                              data=metainfo_econ)
summary(fit_wages_dlaglog_dict13_prlevel_cqfe) # sig pos effect
fit_wages_dlaglog_dict12_prlevel_cqfe <- felm(in_econdevdict12 ~ quarterly_wages_perworker_deltalog
                                              | PRcity  + quarter | 0 | msa_fips + quarter,
                                              data=metainfo_econ)
summary(fit_wages_dlaglog_dict12_prlevel_cqfe) # sig pos effect
fit_wages_dlaglog_dict11_prlevel_cqfe <- felm(in_econdevdict11 ~ quarterly_wages_perworker_deltalog
                                              | PRcity  + quarter | 0 | msa_fips + quarter,
                                              data=metainfo_econ)
summary(fit_wages_dlaglog_dict11_prlevel_cqfe) # sig pos effect

fit_wages_dlaglog_dict9_prlevel_cqfe <- felm(in_econdevdict9 ~ quarterly_wages_perworker_deltalog
                                             | PRcity  + quarter | 0 | msa_fips + quarter,
                                             data=metainfo_econ)
summary(fit_wages_dlaglog_dict9_prlevel_cqfe) # sig pos effect
fit_wages_dlaglog_dict8_prlevel_cqfe <- felm(in_econdevdict8 ~ quarterly_wages_perworker_deltalog
                                             | PRcity  + quarter | 0 | msa_fips + quarter,
                                             data=metainfo_econ)
summary(fit_wages_dlaglog_dict8_prlevel_cqfe) # sig pos effect
fit_wages_dlaglog_dict7_prlevel_cqfe <- felm(in_econdevdict7 ~ quarterly_wages_perworker_deltalog
                                             | PRcity  + quarter | 0 | msa_fips + quarter,
                                             data=metainfo_econ)
summary(fit_wages_dlaglog_dict7_prlevel_cqfe) # sig pos effect
fit_wages_dlaglog_dict6_prlevel_cqfe <- felm(in_econdevdict6 ~ quarterly_wages_perworker_deltalog
                                             | PRcity  + quarter | 0 | msa_fips + quarter,
                                             data=metainfo_econ)
summary(fit_wages_dlaglog_dict6_prlevel_cqfe) # sig pos effect
fit_wages_dlaglog_dict5_prlevel_cqfe <- felm(in_econdevdict5 ~ quarterly_wages_perworker_deltalog
                                             | PRcity  + quarter | 0 | msa_fips + quarter,
                                             data=metainfo_econ)
summary(fit_wages_dlaglog_dict5_prlevel_cqfe) # sig pos effect



## Table A8: economy effect, alternative thresholds

stargazer(fit_wages_dlaglog_dict15_prlevel_cqfe,
          fit_wages_dlaglog_dict14_prlevel_cqfe,
          fit_wages_dlaglog_dict13_prlevel_cqfe,
          fit_wages_dlaglog_dict12_prlevel_cqfe,
          fit_wages_dlaglog_dict11_prlevel_cqfe,
          fit_wages_dlaglog_dict10_prlevel_cqfe,
          fit_wages_dlaglog_dict9_prlevel_cqfe,
          fit_wages_dlaglog_dict8_prlevel_cqfe,
          fit_wages_dlaglog_dict7_prlevel_cqfe,
          fit_wages_dlaglog_dict6_prlevel_cqfe,
          fit_wages_dlaglog_dict5_prlevel_cqfe,
          covariate.labels = c("$\\Delta$ Log(quarterly wages per worker)"),digits = 2,omit = "Constant",
          dep.var.caption = "Probability of document having $x$ words in dictionary:",
          dep.var.labels = c("$>15$","$>14$","$>13$","$>12$","$>11$","$>10$","$>9$","$>8$","$>7$","$>6$","$>5$"),
          notes = "Standard errors clustered by MSA and year-quarter",
          summary.stat = NULL, float=F,omit.stat = "ser",
          add.lines = list(c("City fixed effects?","$\\checkmark$","$\\checkmark$","$\\checkmark$","$\\checkmark$","$\\checkmark$","$\\checkmark$","$\\checkmark$","$\\checkmark$","$\\checkmark$","$\\checkmark$","$\\checkmark$"),
                           c("Quarter fixed effects?","$\\checkmark$","$\\checkmark$","$\\checkmark$","$\\checkmark$","$\\checkmark$","$\\checkmark$","$\\checkmark$","$\\checkmark$","$\\checkmark$","$\\checkmark$","$\\checkmark$")),
          #           caption = "Effect of $\\Delta$ Wages on Probability(Econ Development)",
          #           label="tab:wagesdiff_dict_prlevel",
          out = "Tables/fit_wagesdifflog_dict_prlevel_all_threshold.tex"
)




## create residualized versions of wages vars:
metainfo_econ <- metainfo_econ %>%
  group_by(PRcity) %>%
  mutate(citymean_quarterly_wages_perworker = mean(quarterly_wages_perworker,na.rm=T),
         citymean_quarterly_wages_perworker_delta = mean(quarterly_wages_perworker_delta,na.rm=T),
         citymean_quarterly_wages_perworker_deltalog = mean(quarterly_wages_perworker_deltalog,na.rm=T),
         citymean_in_econdevdict10 = mean(in_econdevdict10,na.rm=T)
  )
metainfo_econ <- metainfo_econ %>%
  mutate(quarterly_wages_perworker_resid_city = quarterly_wages_perworker-citymean_quarterly_wages_perworker,
         quarterly_wages_perworker_delta_resid_city = quarterly_wages_perworker_delta-citymean_quarterly_wages_perworker_delta,
         quarterly_wages_perworker_deltalog_resid_city = quarterly_wages_perworker_deltalog-citymean_quarterly_wages_perworker_deltalog,
         in_econdevdict10_resid_city = in_econdevdict10-citymean_in_econdevdict10
  )


## create fully city- and year-residualized versions of main vars:
metainfo_econ$random <- rnorm(n = nrow(metainfo_econ))
reg2.graph.outcome2 <-(felm(in_econdevdict10 ~ random | PRcity + quarter, 
                            data=metainfo_econ[!is.na(metainfo_econ$quarterly_wages_perworker_deltalog) & !is.na(metainfo_econ$in_econdevdict10),]))

reg2.graph.treatment2 <- (felm(quarterly_wages_perworker_deltalog ~ random | PRcity + quarter, 
                               data=metainfo_econ[!is.na(metainfo_econ$quarterly_wages_perworker_deltalog) & !is.na(metainfo_econ$in_econdevdict10),]))
predictions.outcome2 <- resid(reg2.graph.outcome2)
predictions.treatment2 <- resid(reg2.graph.treatment2)
data.predictions2 <- metainfo_econ[!is.na(metainfo_econ$quarterly_wages_perworker_deltalog) & !is.na(metainfo_econ$in_econdevdict10) & !is.na(metainfo_econ$PRcity) & !is.na(metainfo_econ$quarter),]
colnames(predictions.outcome2) <- "predictions.outcome"
colnames(predictions.treatment2) <- "predictions.treatment"
data.predictions2 <- data.frame(data.predictions2, predictions.outcome2, predictions.treatment2)

summary(data.predictions2$predictions.treatment)
width=0.015
graph <- mutate(data.predictions2[!is.na(data.predictions2$quarterly_wages_perworker_deltalog) & !is.na(data.predictions2$in_econdevdict10),],
                bin = cut(predictions.treatment, breaks=seq(-0.25,0.25, width)))
mean_outcome <- summarise(graph, mean(predictions.outcome))
mean_outcome_raw <- summarise(graph, mean(predictions.outcome))

graph$predictions.outcome <- graph$predictions.outcome - mean(graph$predictions.outcome)
bin1.df <- data.frame(
  bin.mean=tapply(graph$predictions.outcome,
                  graph$bin, mean, na.rm=TRUE),
  mid=seq(-0.25 + width/2, 0.25 - width/2, width),
  N=tapply(graph$predictions.outcome, graph$bin, length))

pdf("Figures/WagesDiffLog_vs_ProbDev10_prlevel_2xresid.pdf",width=10,height=7)
ggplot(graph) + 
  geom_point(data=bin1.df,aes(x=mid,y=bin.mean,size=N),
             pch=19,col="black") +
  geom_smooth(data=subset(graph),
              aes(x = predictions.treatment, y = predictions.outcome),
              method = 'lm', formula = y ~ poly(x, 1), size=1.5,col="blue") + 
  theme_bw() + 
  scale_x_continuous("City- and quarter-residualized change in\nlog(quarterly MSA wages per worker)", limits=c(-0.25,0.2)) + 
  scale_y_continuous("City- and quarter-residualized probability\nof press release about economic growth") + 
  scale_size_continuous("Number of\nPRs in bin") + 
  theme(text = element_text(size=20),legend.position="none")
dev.off()


## Crime analyses, PR-level:
# with and without FEs
fit_crime_dlaglog_dict10_prlevel <- felm(in_crimedict10 ~ nviolent_rate_deltalog
                                         | 0 | 0 | fips_city  + yearmonth
                                         ,data=metainfo_crime)
summary(fit_crime_dlaglog_dict10_prlevel) # neg effect
fit_crime_dlaglog_dict10_prlevel_cfe <- felm(in_crimedict10 ~ nviolent_rate_deltalog
                                             | PRcity | 0 | fips_city  + yearmonth
                                             ,data=metainfo_crime)
summary(fit_crime_dlaglog_dict10_prlevel_cfe) # neg effect
fit_crime_dlaglog_dict10_prlevel_cmfe <- felm(in_crimedict10 ~ nviolent_rate_deltalog
                                             | PRcity + yearmonth| 0 | fips_city  + yearmonth
                                             ,data=metainfo_crime)
summary(fit_crime_dlaglog_dict10_prlevel_cmfe) # neg effect

## Table A7: crime effect, PR-level, with and without FEs:

stargazer(fit_crime_dlaglog_dict10_prlevel,fit_crime_dlaglog_dict10_prlevel_cfe,fit_crime_dlaglog_dict10_prlevel_cmfe,
          covariate.labels = c("$\\Delta$ Log(violent crime rate per 100k + 1)"),digits = 2,omit = "Constant",
          dep.var.caption = "Probability of document having",dep.var.labels = "$>10$ words in dictionary",
          notes = "Standard errors clustered by city and year-month",
          summary.stat = NULL, float=F,omit.stat = "ser",
          add.lines = list(c("City fixed effects?", "","$\\checkmark$","$\\checkmark$"),
                           c("Month fixed effects?", "","","$\\checkmark$")),
          #           caption = "Effect of $\\Delta$ Wages on Probability(Econ Development)",
          #           label="tab:wagesdiff_dict_prlevel",
          out = "Tables/fit_crimedifflog_dict10_prlevel.tex"
)


# alternative thresholds
fit_crime_dlaglog_dict5_prlevel_cmfe <- felm(in_crimedict5 ~ nviolent_rate_deltalog
                                             | PRcity + yearmonth | 0 | fips_city  + yearmonth
                                             ,data=subset(metainfo_crime,abs(nviolent_rate_deltalog)<0.5))
summary(fit_crime_dlaglog_dict5_prlevel_cmfe) # insig neg effect

fit_crime_dlaglog_dict6_prlevel_cmfe <- felm(in_crimedict6 ~ nviolent_rate_deltalog
                                             | PRcity + yearmonth | 0 | fips_city  + yearmonth
                                             ,data=subset(metainfo_crime,abs(nviolent_rate_deltalog)<0.5))
summary(fit_crime_dlaglog_dict6_prlevel_cmfe) # sig neg effect

fit_crime_dlaglog_dict7_prlevel_cmfe <- felm(in_crimedict7 ~ nviolent_rate_deltalog
                                             | PRcity + yearmonth | 0 | fips_city  + yearmonth
                                             ,data=metainfo_crime)
summary(fit_crime_dlaglog_dict7_prlevel_cmfe) # marg neg effect

fit_crime_dlaglog_dict8_prlevel_cmfe <- felm(in_crimedict8 ~ nviolent_rate_deltalog
                                             | PRcity + yearmonth | 0 | fips_city  + yearmonth
                                             ,data=subset(metainfo_crime,abs(nviolent_rate_deltalog)<0.5))
summary(fit_crime_dlaglog_dict8_prlevel_cmfe) # sig neg effect

fit_crime_dlaglog_dict9_prlevel_cmfe <- felm(in_crimedict9 ~ nviolent_rate_deltalog
                                             | PRcity + yearmonth | 0 | fips_city  + yearmonth
                                             ,data=subset(metainfo_crime,abs(nviolent_rate_deltalog)<0.5))
summary(fit_crime_dlaglog_dict9_prlevel_cmfe) # sig neg effect


fit_crime_dlaglog_dict11_prlevel_cmfe <- felm(in_crimedict11 ~ nviolent_rate_deltalog
                                              | PRcity + yearmonth | 0 | fips_city  + yearmonth
                                              ,data=metainfo_crime)
summary(fit_crime_dlaglog_dict11_prlevel_cmfe) # sig neg effect

fit_crime_dlaglog_dict12_prlevel_cmfe <- felm(in_crimedict12 ~ nviolent_rate_deltalog
                                              | PRcity + yearmonth | 0 | fips_city  + yearmonth
                                              ,data=subset(metainfo_crime,abs(nviolent_rate_deltalog)<0.5))
summary(fit_crime_dlaglog_dict12_prlevel_cmfe) # marg neg effect

fit_crime_dlaglog_dict13_prlevel_cmfe <- felm(in_crimedict13 ~ nviolent_rate_deltalog
                                             | PRcity + yearmonth | 0 | fips_city  + yearmonth
                                             ,data=subset(metainfo_crime,abs(nviolent_rate_deltalog)<0.5))
summary(fit_crime_dlaglog_dict13_prlevel_cmfe) # sig neg effect

fit_crime_dlaglog_dict14_prlevel_cmfe <- felm(in_crimedict14 ~ nviolent_rate_deltalog
                                             | PRcity + yearmonth | 0 | fips_city  + yearmonth
                                             ,data=subset(metainfo_crime,abs(nviolent_rate_deltalog)<0.5))
summary(fit_crime_dlaglog_dict14_prlevel_cmfe) # sig neg effect

fit_crime_dlaglog_dict15_prlevel_cmfe <- felm(in_crimedict15 ~ nviolent_rate_deltalog
                                             | PRcity + yearmonth | 0 | fips_city  + yearmonth
                                             ,data=subset(metainfo_crime,abs(nviolent_rate_deltalog)<0.5))
summary(fit_crime_dlaglog_dict15_prlevel_cmfe) # sig neg effect


## Table A9: crime effects, PR-level, alternative thresholds
stargazer(fit_crime_dlaglog_dict15_prlevel_cmfe,
          fit_crime_dlaglog_dict14_prlevel_cmfe,
          fit_crime_dlaglog_dict13_prlevel_cmfe,
          fit_crime_dlaglog_dict12_prlevel_cmfe,
          fit_crime_dlaglog_dict11_prlevel_cmfe,
          fit_crime_dlaglog_dict10_prlevel_cmfe,
          fit_crime_dlaglog_dict9_prlevel_cmfe,
          fit_crime_dlaglog_dict8_prlevel_cmfe,
          fit_crime_dlaglog_dict7_prlevel_cmfe,
          fit_crime_dlaglog_dict6_prlevel_cmfe,
          fit_crime_dlaglog_dict5_prlevel_cmfe,
          covariate.labels = c("$\\Delta$ Log(violent crime rate per 100k + 1)"),digits = 2,omit = "Constant",
          dep.var.caption = "Probability of document having $x$ words in dictionary:",
          dep.var.labels = c("$>15$","$>14$","$>13$","$>12$","$>11$","$>10$","$>9$","$>8$","$>7$","$>6$","$>5$"),
          notes = "Standard errors clustered by city and year-month",
          summary.stat = NULL, float=F,omit.stat = "ser",
          add.lines = list(c("City fixed effects?","$\\checkmark$","$\\checkmark$","$\\checkmark$","$\\checkmark$","$\\checkmark$","$\\checkmark$","$\\checkmark$","$\\checkmark$","$\\checkmark$","$\\checkmark$","$\\checkmark$"),
                           c("Month fixed effects?","$\\checkmark$","$\\checkmark$","$\\checkmark$","$\\checkmark$","$\\checkmark$","$\\checkmark$","$\\checkmark$","$\\checkmark$","$\\checkmark$","$\\checkmark$","$\\checkmark$")),
          out = "Tables/fit_crimedifflog_dict_prlevel_all_threshold.tex"
)


## Crime visual, PR-level
## create fully city- and year-residualized versions of main vars:
reg2.graph.outcome2 <-(felm(in_crimedict10 ~ 1 | PRcity + yearmonth, 
                            data=metainfo_crime[!is.na(metainfo_crime$nviolent_rate_deltalog) & !is.na(metainfo_crime$in_crimedict10),]))

reg2.graph.treatment2 <- (felm(nviolent_rate_deltalog ~ 1 | PRcity + yearmonth, 
                               data=metainfo_crime[!is.na(metainfo_crime$nviolent_rate_deltalog) & !is.na(metainfo_crime$in_crimedict10),]))
predictions.outcome2 <- resid(reg2.graph.outcome2)
predictions.treatment2 <- resid(reg2.graph.treatment2)
data.predictions2 <- metainfo_crime[!is.na(metainfo_crime$nviolent_rate_deltalog) & !is.na(metainfo_crime$in_crimedict10) & !is.na(metainfo_crime$PRcity) & !is.na(metainfo_crime$quarter),]
colnames(predictions.outcome2) <- "predictions.outcome"
colnames(predictions.treatment2) <- "predictions.treatment"
data.predictions2 <- data.frame(data.predictions2, predictions.outcome2, predictions.treatment2)

reg21 <- felm(in_crimedict10 ~ nviolent_rate_deltalog | PRcity + yearmonth,
              data=metainfo_crime[!is.na(metainfo_crime$nviolent_rate_deltalog) & !is.na(metainfo_crime$in_crimedict10),])
reg22 <- (lm(predictions.outcome ~ predictions.treatment, data=data.predictions2))


summary(data.predictions2$predictions.treatment)
width=0.07
graph <- mutate(data.predictions2[!is.na(data.predictions2$nviolent_rate_deltalog) & !is.na(data.predictions2$in_crimedict10),],
                bin = cut(predictions.treatment, breaks=seq(-0.95,0.95, width)))
mean_outcome <- summarise(graph, mean(predictions.outcome))
mean_outcome_raw <- summarise(graph, mean(predictions.outcome))

graph$predictions.outcome <- graph$predictions.outcome - mean(graph$predictions.outcome)
bin1.df <- data.frame(
  bin.mean=tapply(graph$predictions.outcome,
                  graph$bin, mean, na.rm=TRUE),
  mid=seq(-0.95 + width/2, 0.95 - width/2, width),
  N=tapply(graph$predictions.outcome, graph$bin, length))

pdf("Figures/CrimeRateDiffLog_vs_ProbCrime10_prlevel_2xresid.pdf",width=10,height=7)
ggplot(graph) + 
  geom_point(data=bin1.df,aes(x=mid,y=bin.mean
                              ,size=N
  ),pch=19,
  col="black") +
  geom_smooth(data=subset(graph),
              aes(x = predictions.treatment, y = predictions.outcome),
              method = 'lm', formula = y ~ poly(x, 1), size=1.5,col="blue") + 
  theme_bw() + 
  scale_x_continuous("City- and month-residualized change in\nlog(monthly violent crime rate + 1)",limits=c(-1,0.56)) + 
  scale_y_continuous("City- and month-residualized probability\nof press release about crime") + 
  scale_size_continuous("Number of\nPRs in bin") + 
  theme(text = element_text(size=20),legend.position="none")
dev.off()


#### Appendix E: Effects on Density of Press Releases ####
## placebo outcome: density of PRs:
fit_wages_dlaglog_num_qlevel_cqfe <- felm(num ~ quarterly_wages_perworker_deltalog
                                          | PRcity + quarter | 0 | msa_fips ,
                                          data=citysumm_econ_quarter)
summary(fit_wages_dlaglog_num_qlevel_cqfe) 

fit_crime_dlaglog_num_mlevel_cmfe <- felm(num ~ nviolent_rate_deltalog
                                          | PRcity + yearmonth | 0 | fips_city  + yearmonth,
                                          data=subset(citysumm_crime_month,abs(nviolent_rate_deltalog)<1))
summary(fit_crime_dlaglog_num_mlevel_cmfe) #

## Table A10: Effects on number of press releases

stargazer(fit_wages_dlaglog_num_qlevel_cqfe,fit_crime_dlaglog_num_mlevel_cmfe,
          covariate.labels = c("$\\Delta$ Log(quarterly wages per worker)","$\\Delta$ Log(violent crime rate per 100k + 1)"),digits = 2,omit = "Constant",
          dep.var.labels = "Number of press releases",
          notes = c("Standard errors clustered by MSA and year-quarter,","in model 1 and by city and year-month in model 2"),
          summary.stat = NULL, float=F,omit.stat = "ser",
          add.lines = list(c("City fixed effects?", "$\\checkmark$","$\\checkmark$"),
                           c("Quarter fixed effects?","$\\checkmark$",""),
                           c("Month fixed effects?","","$\\checkmark$")),
          out = "Tables/fit_crimewages_difflog_num_qlevel.tex"
)

